{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6fc0fd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# encode分隔符：https://blog.csdn.net/pearl8899/article/details/119328276\n",
    "#HuggingFace bert  https://huggingface.co/docs/transformers/model_doc/bert\n",
    "#HuggingFace快速上手（以bert-base-chinese为例） https://zhuanlan.zhihu.com/p/610171544"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f09f7e9f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 6, 768])\n",
      "torch.Size([1, 768])\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from transformers import BertTokenizer,BertModel\n",
    "\n",
    "tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')\n",
    "model = BertModel.from_pretrained('bert-base-chinese')\n",
    "\n",
    "input_ids = torch.tensor(tokenizer.encode(\"我是学生\")).unsqueeze(0)  # Batch size 1\n",
    "outputs = model(input_ids)\n",
    "# last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple\n",
    "sequence_output = outputs[0]\n",
    "pooled_output = outputs[1]\n",
    "print(sequence_output.shape)    ## 字向量\n",
    "print(pooled_output.shape)      ## 句向量\n",
    "\n",
    "# [cls] 我 是 学 生 [sep]\n",
    "#  0    1  2  3  4   5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b4be7c89",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 101, 2769, 3221, 2110, 4495,  102]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(input_ids)  #[CLS]-101   [SEP]-102"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0840ee99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 8.3692e-01, -2.9698e-01,  6.1697e-01,  3.5656e-01, -1.1165e-01,\n",
       "        -2.3632e-01,  3.9202e-01,  2.7072e-01, -2.2060e-01,  4.3047e-01,\n",
       "        -3.7540e-01,  6.4582e-01, -5.3593e-01, -4.1767e-01,  4.7740e-02,\n",
       "         1.6274e-01,  7.1593e-01,  5.7389e-01, -1.2901e-01, -5.8728e-01,\n",
       "         3.5399e-01, -1.5324e-01, -9.9516e-02, -2.5790e-01,  6.4236e-01,\n",
       "        -4.5778e-01, -5.1912e-01, -7.6143e-01,  9.4854e-01,  4.6397e-01,\n",
       "        -6.1207e-01,  1.6577e-01, -6.7345e-01,  4.6260e-02, -7.2574e-01,\n",
       "        -1.0402e+00, -4.3579e-01, -5.6851e-01, -3.8117e-01, -4.5302e-01,\n",
       "         2.9355e-01,  2.7310e-04,  2.8747e-01, -3.8367e-01, -1.7324e-01,\n",
       "        -5.0885e-01, -8.8334e-01,  1.8203e-01,  2.6023e-01,  2.9225e-01,\n",
       "         6.0834e-01,  7.3961e+00, -5.2391e-01,  1.0304e+00, -4.6132e-01,\n",
       "         6.7003e-01,  7.3994e-01, -7.4058e-01, -1.1607e-01,  5.5337e-01,\n",
       "        -1.0852e+00, -2.4527e-02, -1.2338e-01,  2.4334e-01,  6.6001e-02,\n",
       "        -5.9785e-01, -3.3717e-02, -2.9076e-01,  7.8016e-02, -5.8602e-03,\n",
       "         6.5949e-02, -5.5169e-01,  5.2782e-01,  8.8298e-01,  6.2469e-02,\n",
       "        -1.9270e-01,  6.8793e-02,  4.7722e-01,  3.3624e-02, -8.4096e-01,\n",
       "        -1.9586e-01, -2.6021e-01, -1.8254e-02, -9.3378e-01, -8.7842e-02,\n",
       "        -4.6636e-01, -8.3304e-01,  3.5682e-01,  2.9424e-01,  1.1283e+00,\n",
       "        -7.7577e-02, -3.7459e-01,  3.8992e-02,  6.1942e-01, -2.6361e-01,\n",
       "         3.3725e-01,  2.9129e-01, -7.8157e-01, -3.9678e-01,  4.4749e-01,\n",
       "         1.2684e-01, -1.7987e-01, -1.6414e-01, -3.6908e-01, -2.2849e-01,\n",
       "        -1.6734e-01, -1.7638e-01,  1.2785e-01, -3.5581e-01,  4.8744e-01,\n",
       "        -5.1801e-01,  3.1893e-02,  4.4007e-01,  3.5942e-01, -8.4325e-01,\n",
       "         2.3074e-01, -1.2684e-01,  1.2906e+00,  2.4113e-01,  3.0093e-01,\n",
       "         2.0414e-01,  1.0142e+00, -1.7710e-01, -7.1706e-01, -1.7813e-02,\n",
       "        -2.0346e-01, -3.2652e-01,  6.7010e-02, -9.4467e-02,  4.9821e-01,\n",
       "         2.1099e-01, -4.4247e-01,  3.7072e-02,  3.3634e-03, -3.0127e-02,\n",
       "         3.9247e-01, -5.7319e-01, -1.0218e-01, -2.3727e-02, -3.3548e-01,\n",
       "         6.1738e-01, -3.6678e-01, -7.8701e-01,  1.1084e-01,  3.5802e-01,\n",
       "        -3.6771e-01,  2.8280e-01, -2.1462e-01,  1.4304e-01, -1.5203e-01,\n",
       "         3.2479e-01, -3.4279e-01, -8.4956e-01,  8.8223e-01, -8.7112e-02,\n",
       "        -8.7443e-01,  3.0299e-01,  3.8775e-01,  1.3491e-01, -3.6613e-01,\n",
       "         1.7790e-01,  2.7796e-01,  4.5344e-01,  4.9444e-01,  8.8861e-02,\n",
       "        -2.6893e-01,  9.9308e-02, -2.0170e-01,  2.4445e-02, -6.1437e-02,\n",
       "         2.6066e-01,  3.8781e-02, -1.7370e-01,  3.6284e-01, -1.1697e+00,\n",
       "         3.4677e-01, -7.2954e-01,  6.5914e-01, -2.4918e-02, -4.8434e-01,\n",
       "        -2.0962e-01, -2.1221e-01,  6.4146e-01, -3.1913e-01, -1.1117e-01,\n",
       "         1.6504e-01, -4.4417e-01,  3.1095e-02, -1.3116e-01,  3.5513e-02,\n",
       "        -9.5258e-01, -1.0894e+00,  3.0512e-01,  2.3262e-01,  3.7556e-01,\n",
       "         7.4838e-01, -4.5646e-01, -3.7382e-01,  3.6663e-02,  4.1476e-01,\n",
       "         3.6550e-01, -1.7057e-02, -6.2433e-01,  2.0932e-01,  1.2972e-02,\n",
       "         6.4852e-01, -1.7323e-01, -9.7677e-02,  3.5344e-01,  1.1045e-01,\n",
       "        -8.3590e-02, -9.6595e-01, -6.4936e-01,  5.7648e-01,  5.0494e-01,\n",
       "        -5.1005e-01, -1.0216e-01,  2.5787e-01,  1.7271e-02,  2.0693e-01,\n",
       "         7.2802e-01, -2.6261e-01,  9.3349e-01, -6.4948e-01, -2.9654e-01,\n",
       "         5.0090e-01,  3.1703e-01,  1.3208e-01, -1.8830e-01, -8.1405e-01,\n",
       "         4.1221e-01,  2.9060e-01, -3.6861e-01,  5.3240e-01,  1.6886e-01,\n",
       "         3.2117e-01,  4.9114e-01,  2.2716e-01,  4.6554e-02, -5.3572e-01,\n",
       "         2.5527e-02,  2.8151e-01, -7.8137e-02, -4.4728e-01, -7.1191e-02,\n",
       "         2.9442e-01, -5.2292e-01,  2.6210e-01,  3.1112e-01,  1.2889e-01,\n",
       "         1.9889e-01, -3.5100e-01, -2.9951e-01,  2.4461e-01, -8.4813e-01,\n",
       "         1.2358e+00, -2.7961e-01, -6.8421e-01, -8.9036e-01,  2.5984e-01,\n",
       "        -6.2673e-01,  6.6672e-01, -5.0767e-02,  9.0397e-02,  7.7125e-01,\n",
       "        -2.6405e-01,  3.6601e-01, -8.5588e-02,  1.0245e-01, -3.0054e-01,\n",
       "        -4.8724e-01, -2.1904e-01, -4.5779e-01, -2.3083e-01,  4.8324e-03,\n",
       "         4.9279e-01,  4.3285e-01,  6.9704e-01,  2.7323e-01, -6.5964e-01,\n",
       "         6.6186e-01,  1.7550e-01,  8.4253e-02, -7.6291e-01, -1.5876e-01,\n",
       "         1.3326e-01, -2.4092e-01,  1.6389e+00,  5.3009e-01,  5.1253e-01,\n",
       "        -7.6899e-01, -5.7243e-01,  3.7693e-01,  1.1841e-01,  9.7298e-01,\n",
       "         1.1737e+00, -6.7640e-01, -2.8960e-01,  1.2640e-01, -5.5437e-01,\n",
       "        -7.9017e-01,  4.2780e-01,  4.4650e-01,  4.2949e-01,  9.9563e-01,\n",
       "        -3.6347e-01,  8.0008e-01,  8.8920e-01,  9.9499e-02, -8.9435e-01,\n",
       "        -7.9520e-02, -5.1153e-01, -5.1947e-01, -6.9671e-02, -1.4471e+00,\n",
       "         3.1027e-04, -2.0462e-01, -5.2074e-01, -2.2388e-03, -5.1269e-01,\n",
       "         2.4107e-02,  3.4303e-01,  1.6678e-01, -2.0408e-01, -3.8420e-01,\n",
       "         2.2515e-02, -1.6595e-01,  1.1203e-01, -1.2186e-01,  4.6209e-01,\n",
       "         2.1108e-01, -7.2246e-01,  3.9302e-01, -4.9753e-02,  3.6139e-01,\n",
       "         6.4899e-01, -2.4715e-01,  7.8257e-01, -9.5455e-01, -2.9947e-02,\n",
       "         6.4662e-01, -4.6693e-01,  4.8387e-01, -1.6506e-01,  2.9047e-01,\n",
       "         2.2147e-01,  2.1748e-01,  2.0707e-01, -4.9122e-03, -6.4039e-01,\n",
       "         4.0603e-01,  1.6662e-01, -3.5345e-01, -3.6534e-02, -7.8519e-01,\n",
       "         3.6083e-01,  2.3305e-01,  3.2447e-01,  5.0041e-01, -2.3579e-01,\n",
       "         3.5835e-01, -3.4092e-01, -2.6960e-01, -5.9336e-01,  7.7285e-01,\n",
       "         3.9523e-01, -4.3265e-02,  7.5875e-01,  4.8365e-01, -6.2997e-01,\n",
       "         6.8267e-01, -2.4479e-01,  1.5094e+00, -7.2348e-01, -5.1014e-02,\n",
       "        -2.7385e-01, -1.0227e-01,  2.6990e-01,  5.9239e-02, -4.2774e-01,\n",
       "        -8.2698e-02, -4.1665e-01,  9.8200e-02, -9.3886e-01,  1.3246e-01,\n",
       "         2.5292e-01, -1.4582e-01, -1.8122e-01,  3.7358e-01, -9.9002e-02,\n",
       "        -1.2951e-01, -2.2203e-01, -1.7573e-02,  2.4565e-01, -1.9032e-01,\n",
       "         2.2666e-01, -1.6055e+00, -2.3109e-01, -1.0365e-01, -2.7217e-01,\n",
       "        -2.9582e-01, -3.4259e-01, -5.3555e-01, -1.7046e-01,  6.2371e-01,\n",
       "         5.0679e-01,  5.8585e-01, -9.2537e-04,  1.3395e+00,  2.9830e-01,\n",
       "        -2.2422e-01,  7.7174e-01, -1.3431e-01, -2.4687e-01, -3.1105e-01,\n",
       "        -2.0391e-01,  3.8577e-01, -1.8698e-01,  3.9097e-01, -5.3084e-02,\n",
       "         3.8402e-01,  4.3504e-01, -4.0663e-01,  1.6424e-01, -4.6704e-01,\n",
       "        -7.5209e-01, -4.4655e-01, -8.1338e-01, -1.3433e-01,  1.0475e-01,\n",
       "        -1.0163e-01, -7.4115e-01, -1.3912e-01,  1.1766e+00, -2.6190e-01,\n",
       "         2.6101e-01, -4.2956e-01,  5.2893e-01,  1.6265e-01, -1.1154e-01,\n",
       "        -9.4365e-02, -3.4523e-01,  2.7761e-01,  1.9385e-01, -5.4801e-02,\n",
       "        -6.9019e-01,  3.9662e-01, -2.9210e-01,  3.0084e-01,  2.5701e-01,\n",
       "         5.3848e-01,  4.0344e-01,  1.4256e-01, -1.0482e-01,  1.9089e-01,\n",
       "        -4.6219e-01,  5.3058e-01,  1.7645e-02,  2.5781e-02,  6.4221e-02,\n",
       "        -2.1513e-01, -2.8422e-02,  5.8446e-01, -1.7335e-01,  7.4494e-01,\n",
       "         1.6190e-01,  1.2874e-02, -2.4826e-01,  6.5677e-02,  5.1666e-01,\n",
       "        -3.7978e-01,  6.4886e-01, -3.6631e-01,  4.7916e-01,  5.3845e-02,\n",
       "         3.4238e-01, -1.8735e-01, -5.7350e-01, -1.1850e+00, -1.1555e+00,\n",
       "        -7.8511e-02,  3.7784e-01,  1.1985e+00,  9.1022e-03,  3.2067e-01,\n",
       "        -9.5311e-01, -3.9096e-01, -4.1948e-01, -1.3803e-01, -3.9071e-02,\n",
       "        -4.8322e-01, -7.0463e-01,  3.3332e-01, -1.8334e-01, -4.1431e-01,\n",
       "        -4.1145e-01, -1.0030e-01, -2.1617e-01,  2.5895e-01,  1.1511e-01,\n",
       "        -3.6548e-01,  1.6189e-02, -1.8988e-01, -1.0121e-01, -9.4315e-01,\n",
       "        -1.5857e-01, -2.5809e-01,  6.8455e-02, -1.6903e-01, -1.0060e-01,\n",
       "        -7.1982e-02, -1.3446e+00, -5.1716e-02, -1.0010e+00, -5.6016e-01,\n",
       "        -2.1234e-01, -1.0113e+00,  2.4279e-01, -4.4098e-01, -3.3731e-01,\n",
       "         8.5900e-02, -4.4836e-01,  4.1499e-02,  3.6389e-01,  5.6362e-02,\n",
       "        -1.4006e-01, -2.0756e-01, -3.4165e-01,  2.3092e-01,  1.2024e+00,\n",
       "         3.5617e-01,  5.3700e-02,  2.8141e-01, -6.2055e-01, -1.3663e-01,\n",
       "        -2.9199e-01,  6.6677e-01, -8.3870e-01, -2.5607e-01,  3.6735e-01,\n",
       "         2.9860e-01, -7.3617e-01,  8.6571e-01,  9.1164e-01, -3.6860e-01,\n",
       "        -5.6025e-01, -7.1504e-01, -5.4183e-01,  4.4770e-01,  4.1260e-01,\n",
       "        -2.4055e-01, -5.6490e-01, -1.7087e-01,  7.8932e-02, -2.1281e-01,\n",
       "        -3.8585e-01, -2.4022e-01, -6.0445e-01,  9.1187e-01, -2.7925e-01,\n",
       "         3.2891e-01,  3.4985e-01,  1.2942e-01, -8.5534e-01, -6.6757e-01,\n",
       "        -8.2856e-03, -3.6998e-01, -3.5427e-01, -3.7386e-01,  1.1585e-01,\n",
       "        -3.3563e-01, -6.0020e-01, -1.0231e+00, -1.3342e-01,  2.3045e-01,\n",
       "        -1.5942e-01, -8.2551e-02,  5.8119e-01,  4.3905e-02, -7.4320e-01,\n",
       "         2.7728e-01,  3.0346e-01,  4.5193e-01,  3.7062e-01,  1.3825e-01,\n",
       "         2.9573e-01,  3.5192e-01, -2.0642e-01,  5.4953e-01,  2.8852e-01,\n",
       "         2.7700e-01,  2.1242e-01, -2.7312e-01, -4.2732e-02, -1.9065e-01,\n",
       "        -2.6462e-01,  7.6145e-01, -2.4381e-01,  1.0690e-01,  6.4940e-01,\n",
       "         8.3298e-01,  8.0127e-02,  1.5598e-02,  1.8566e-01,  8.7733e-02,\n",
       "        -5.8021e-01,  3.8904e-01, -1.7654e-01, -9.9363e-01,  2.3873e-02,\n",
       "        -4.9414e-01, -4.0573e-01,  2.9534e-01, -3.1996e-01, -5.5258e-01,\n",
       "         3.6849e-01, -8.0381e-01, -5.4214e-03, -1.1757e-01, -6.3311e-01,\n",
       "         1.8393e-01, -6.2470e-01,  3.5195e-01, -2.0297e-02, -4.3241e-01,\n",
       "        -2.8790e-01,  6.0056e-01,  1.1130e-02,  9.5271e-01,  2.2163e-02,\n",
       "        -2.0929e-01, -2.9218e-01, -5.4190e-01, -2.8057e-02, -2.3961e-01,\n",
       "        -5.1923e-01, -9.2945e-02,  5.1530e-01, -3.1712e-02, -6.5026e-01,\n",
       "         7.1479e-02,  1.7572e-02,  6.0701e-01, -7.1990e-01, -4.4219e-01,\n",
       "        -3.7819e-01,  9.2039e-01, -3.3151e-01,  6.2271e-01, -1.8273e-01,\n",
       "         1.1607e+00,  3.7207e-01, -5.5046e-01, -2.6838e-01, -2.0603e-01,\n",
       "         2.8447e-01, -1.9734e-02,  1.7821e-01, -1.1237e+00, -4.2701e-01,\n",
       "        -3.5847e-01,  8.2271e-02,  1.2566e+00, -3.5841e-01,  6.7053e-02,\n",
       "         1.0599e-01, -7.3714e-01, -4.6438e-01, -8.4153e-01,  6.8558e-01,\n",
       "        -2.6604e-01, -5.1677e-01,  6.6214e-01, -9.6659e-02, -1.7596e-01,\n",
       "         7.5727e-01, -2.1660e-01,  1.0340e-01, -1.7546e-01, -6.7428e-01,\n",
       "         3.6900e-01, -1.5017e-01,  5.9047e-01, -2.9295e-01,  2.9905e-01,\n",
       "         5.0494e-01, -4.2101e-01, -2.6098e-01, -4.2132e-02,  2.1825e-01,\n",
       "        -4.4004e-01,  1.7718e-02,  5.7156e-01,  1.3456e-01, -8.2668e-01,\n",
       "         6.5094e+00,  1.0927e-01, -2.5078e-01, -1.7866e-01,  5.5295e-01,\n",
       "         6.8566e-01,  1.7459e-01,  5.9792e-01,  1.7598e-01,  5.0170e-01,\n",
       "        -5.3999e-01,  1.8807e-01,  5.6012e-01,  2.2692e-01,  1.1231e-01,\n",
       "        -5.0423e-01, -5.6196e-03,  7.6639e-01, -8.8837e-02, -1.3270e-01,\n",
       "         3.7701e-01,  2.8373e-01, -5.8450e-01,  4.0204e-01, -3.2476e-01,\n",
       "        -9.4485e-01, -4.9031e-01,  4.4709e-01, -5.4590e-01,  2.6906e-01,\n",
       "         2.3193e-01, -7.1843e-01,  8.5132e-02,  5.2217e-01, -1.8268e-01,\n",
       "         1.3116e-01,  9.8077e-02,  1.7771e-02, -3.7056e-01, -4.0124e-01,\n",
       "         4.0970e-01, -7.0785e-01,  6.9476e-02,  1.7988e+00, -3.3939e-01,\n",
       "         8.0354e-01, -3.4132e-01,  5.7550e-01,  4.1124e-01, -1.3411e+00,\n",
       "        -1.4147e-01, -1.3064e+00, -7.0122e-01, -4.4353e-01,  1.1855e+00,\n",
       "         5.2644e-02,  8.4064e-01,  3.6050e-01, -5.4817e-01,  3.5546e-01,\n",
       "        -8.2728e-02,  2.5156e+00, -4.5816e-01,  6.3514e-02, -1.2503e-01,\n",
       "        -1.0628e-01, -6.7487e-01,  4.0554e-01,  3.9324e-01, -5.8934e-02,\n",
       "        -3.2268e-01,  5.9373e-01, -1.7113e-01], grad_fn=<SelectBackward>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(sequence_output[0][1]) #“我”的字向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd0bddca",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
